# Introduction -----

#In this script we will produce tables and statistics to accompany figure 1 of the manuscript, which contains:

# Demographics of the study population
# Psychiatric data from CAPA
# Behavioural task data
# Hypnographic data


# Set up ----

## Load packages and metadata ---------

#Load packages
pacman::p_load(tidyverse,patchwork,ghibli,eegUtils,
               emmeans,lme4,lmerTest,broom.mixed,
               survival,survminer,gtsummary)


#Get our common settings and functions
source("./eLife Submission Scripts/Analysis-Common-Utilities.R")


# Table 1 =====

## Table 1 Top Half =====

meta %>%
  select(group, age_eeg,gender,pesall,sleepprobsall,fsiqall,anyanxsymsall,adhdsymsall,asqtotalsymsall) %>%
  mutate(pesall = factor(ifelse(pesall == 1,"PE","No PE"))) |>
  drop_na(age_eeg) |>
  rename(`Age @ EEG` = age_eeg,
         Sex = gender) %>%
  tbl_summary(by = group,
              type = sleepprobsall ~ "continuous",
              statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  add_p(test = list(all_continuous() ~ "t.test", 
                    all_categorical() ~ "fisher.test")) %>%
  modify_header(label = "**Variable**") %>%
  modify_spanning_header(c("stat_1", "stat_2") ~ "**Genotype**") %>%
  bold_labels()

#Note we will replace these stats with mixed models to account for family, we simply 
#include the stats here to make the table the right shape

### Table 1 Stats =====

#LM for age
m0 = lm(age_eeg ~ group,
          data = meta %>%
            select(group, age_eeg,fsiqall) %>% 
            drop_na(age_eeg) )

t0 = 
  tidy(m0, conf.int = T) |>
  filter(term == "group22q") |>
  mutate(name = "age_eeg") |>
  relocate(name) 

#Linear model for IQ
m1 = lmer(fsiqall ~ group + (1|family),
          data = meta %>%
            select(group, family, age_eeg,fsiqall) %>% 
            drop_na(age_eeg) )

t1 = 
  tidy(m1,effect = "fixed", conf.int = T,ddf = "Kenward-Roger") |>
  filter(term == "group22q") |>
  mutate(name = "fsiqall") |>
  relocate(name) |>
  select(-effect)

#Poisson Model for symptom counts
t2 = 
  meta %>%
  select(group,age_eeg, family,sleepprobsall,anyanxsymsall,adhdsymsall,asqtotalsymsall) %>% 
  drop_na(age_eeg) |>
  pivot_longer(-c(group:family)) |>
  group_by(name) |>
  nest() |>
  mutate(models = map(data, ~glmer(data = .x, 
                                  formula = value ~ group +  (1|family),
                                  family = poisson(link = "log"))),
         tabs = map(models,~tidy(.x,effect = "fixed", conf.int = T,
                                 exponentiate = T,
                                 ddf = "Kenward-Roger") |>
                      filter(term == "group22q"))) |>
  unnest(tabs) |>
  select(term:conf.high)

#Chisq test for gender
m3 = 
  meta %>% 
  select(age_eeg,group,gender) %>% 
  drop_na(age_eeg) %>% 
  select(-age_eeg) %>% 
  group_by(group,gender) %>%
  tally() %>%
  pivot_wider(names_from = "group", values_from = "n") %>%
  select(-gender) %>%
  chisq.test() %>% 
  tidy()

t3 = 
  m3 |> 
  mutate(name = "gender",term = "group",
         estimate = statistic,std.error = NA,conf.low = NA, conf.high = NA) |> 
  select(-method)


#Fisher's Exact Test for Psychotic Experiences
m4 = 
  meta %>% 
  select(age_eeg,group,pesall) %>% 
  mutate(pesall = factor(ifelse(pesall == 1,"PE","No PE"))) %>%
  drop_na(age_eeg) %>% 
  select(-age_eeg) %>% 
  group_by(group,pesall) %>%
  tally() %>%
  pivot_wider(names_from = "group", values_from = "n") %>%
  select(-pesall) %>%
  fisher.test() %>% 
  tidy()

t4 = 
  m4 |> 
  mutate(name = "pesall",term = "group",std.error = NA,statistic = NA) |> 
  select(-c(method,alternative))


#Combine into one table

bind_rows(t0,t3,t1,t2,t4) |>
  select(-c(term,std.error,statistic,df)) |>
  mutate(statistic = c("Group Difference","Chi-Squared","Group Difference",
                       "Odds Ratio","Odds Ratio","Odds Ratio","Odds Ratio","Odds Ratio")) |>
  mutate(across(where(is.double),round,digits = 3)) |>
  transmute(name,statistic,
            estimate = paste(estimate," [",conf.low,", ",conf.high,"]",sep=""),
            p.value) |>
  knitr::kable(format = "html", booktabs = TRUE) |>
  kableExtra::kable_styling(font_size = 11)


## Table 1 Bottom Half =====

meta %>%
  drop_na(age_eeg) |>
  select(group, 
         percnightn1_eeg , percnightn2_eeg ,percnightn3_eeg, percnightrem_eeg,
         latencyn1_eeg,latencyrem_eeg,se_eeg,tst_eeg,waso_eeg) %>% 
  tbl_summary(by = group,
              #type = sleepprobsall ~ "continuous",
              statistic = list(all_continuous() ~ "{mean} ({sd})")) %>%
  add_p(test = list(all_continuous() ~ "t.test", 
                    all_categorical() ~ "chisq.test")) %>%
  modify_header(label = "**Variable**") %>%
  modify_spanning_header(c("stat_1", "stat_2") ~ "**Genotype**") %>%
  bold_labels()

#Do stats better than tbl_summary above, using mixed models
m_hypno = 
  meta %>%
  drop_na(age_eeg) |>
  select(group, subject, family,gender,age_eeg,
         percnightn1_eeg , percnightn2_eeg ,percnightn3_eeg, percnightrem_eeg,
         latencyn1_eeg,latencyrem_eeg,se_eeg,tst_eeg,waso_eeg) |>
  pivot_longer(-c(group:age_eeg),names_to = "measure") |>
  group_by(measure) |>
  nest() |>
  mutate(models = map(data,~lmer(value ~ group + gender + age_eeg + (1|family),
                                 data = .x)),
         tidy_d = map(models,~tidy(.x,effect = "fixed", conf.int = T,ddf = "Kenward-Roger")))


m_hypno |>
  unnest(tidy_d)|>
  filter(term == "group22q") |>
  select(measure,estimate:conf.high) %>%
  mutate(across(where(is.double),round,digits = 3),
         Difference = paste(estimate," [",conf.low,", ",conf.high,"]",sep="")) |>
  select(measure,Difference,p.value) |>
  knitr::kable(format = "html", booktabs = TRUE) |>
  kableExtra::kable_styling(font_size = 11)


#And we will fuse these tables in excel to make the paper final version


# Table 2 =====

# Table 2 does not require any computation so is added to the manuscript directly in word

# Table 3: Adjusted sleep model =====

#Marianne suggested socio-economic status, and we can use the psychiatric symptoms too
m_sleep =
  meta %>%
  select(group,family,income,gender,age_eeg,sleepprobsall,anyanxsymsall,adhdsymsall,asqtotalsymsall,pesall) %>%
  drop_na() %>%
  mutate(income = factor(income),
         pesall = factor(ifelse(pesall == 1,"PE","No PE"),levels = c("No PE","PE"))) %>%
  glmer(sleepprobsall ~ group + gender + age_eeg + income +  anyanxsymsall +adhdsymsall + asqtotalsymsall + pesall + (1|family),
        data = .,
        family = poisson(link = "log")) 

# GGally::ggcoef_model(m_sleep)

m_sleep |>
  tidy(exponentiate = T,conf.int = T, effect = "fixed") |>
  filter(term != "(Intercept)")  |>
  mutate(across(where(is.double),round,digits = 3),
         estimate = paste(estimate," [",conf.low,", ",conf.high,"]",sep="")) |>
  select(term,estimate,p.value) |>
  knitr::kable(format = "html", booktabs = TRUE) |>
  kableExtra::kable_styling(font_size = 11)


#Table 4: Behavioural Data ------

## Evening cycles to criterion model #####

# Prepare data for cox proportional hazards regression
d_surv = 
  meta %>% 
  drop_na(age_eeg) %>%
  mutate(reachCrit = ifelse(!is.na(rep2Crit_evening),1,0))

#Count up sample size
d_surv |> 
  drop_na(maxCyc_evening) |> 
  group_by(group) |> 
  tally()

#Fit multilevel survival model
m4m = coxme::coxme(Surv(maxCyc_evening,reachCrit) ~ group + gender + age_eeg + (1|family),data = d_surv)

#We will extract the hazard ratios
extract_coxme_table <- function (mod){
  
  t = 
    mod$coefficients |> 
    as_tibble(rownames = "term") |>
    rename(beta = value)
  
  nvar   = length(mod$coefficients)
  nfrail = nrow(mod$var) - nvar
  se     = sqrt(diag(mod$var)[nfrail + 1:nvar])
  
  t = 
    t |>
    mutate(z = round(beta/se, 2),
           p.value = signif(1 - pchisq((beta/se)^2, 1), 2))
  
  ci = 
    confint(mod) |> 
    as_tibble(rownames = "term") |>
    rename(conf.low = `2.5 %`,
           conf.high = `97.5 %`)
  
  t = left_join(t,ci,by = "term")
  return(t)
}

#Prepare a table for presentation
extract_coxme_table(m4m)  %>%
  mutate(across(c(beta,conf.low,conf.high),exp),
         across(where(is.double),round,digits = 3),
         HR = paste(beta," [",conf.low,", ",conf.high,"]",sep="")) |>
  select(term,HR,p.value) |>
  knitr::kable(format = "html", booktabs = TRUE) |>
  kableExtra::kable_styling(font_size = 11)


## 22q Adjusted model =====

# Fit a model to just the 22q to investigate the relationship between psychiatric symptoms and performance

m4_adj_22q = 
  coxph(Surv(maxCyc_evening,reachCrit) ~ gender + age_eeg + fsiqall + anyanxsymsall + adhdsymsall + asqtotalsymsall + pesall,
        data = d_surv |>
          filter(group == "22q") |> mutate(pesall = factor(pesall,levels = c(0,1))))


m4_adj_22q |>
  tidy(conf.int = T,exponentiate = T)  %>%
  mutate(across(where(is.double),round,digits = 3),
         HR = paste(estimate," [",conf.low,", ",conf.high,"]",sep="")) |>
  select(term,HR,p.value) |>
  knitr::kable(format = "html", booktabs = TRUE) |>
  kableExtra::kable_styling(font_size = 11)


## Hits in the morning #####

#Hits are trials from a binomial (there were 15 trials in total in the morning session)

#How many participants did the morning task?
meta |> drop_na(age_eeg,hits_morning) |> group_by(group) |> tally()

#Fit a simple model
m_hits =
  meta %>%
  select(group,family,gender,age_eeg,hits_morning) %>%
  drop_na() %>%
  glmer(cbind(hits_morning, 15 - hits_morning) ~ group + age_eeg + gender + (1|family),
        data = .,
        family = binomial(link = "logit")) 


#Tidy model output into a table for presentation
tidy(m_hits,exponentiate = T, conf.int = T,
     ddf = "Kenward-Rogers",effect = "fixed") %>%
  mutate(across(where(is.double),round,digits = 3),
         OR = paste(estimate," [",conf.low,", ",conf.high,"]",sep="")) |>
  select(term,OR,p.value) |>
  filter(term != "(Intercept)") |>
  knitr::kable(format = "html", booktabs = TRUE) |>
  kableExtra::kable_styling(font_size = 11)

## 22q adjusted morning model =====


# Fit model to 22q participants only to look at relationship to IQ and psychiatric symptoms

m_hits_adj_22q =
  meta %>%
  select(group,family,gender,age_eeg,hits_morning,fsiqall,anyanxsymsall,adhdsymsall,asqtotalsymsall,pesall) %>%
  drop_na() %>%
  filter(group == "22q") %>%
  mutate(pesall = factor(pesall,levels = c(0,1))) %>%
  glm(cbind(hits_morning, 15 - hits_morning) ~ age_eeg + gender + fsiqall + anyanxsymsall + adhdsymsall + asqtotalsymsall + pesall,
      data = .,
      family = binomial(link = "logit")) 

#Present as a table
tidy(m_hits_adj_22q,exponentiate = T, conf.int = T) %>%
  mutate(across(where(is.double),round,digits = 3),
         OR = paste(estimate," [",conf.low,", ",conf.high,"]",sep="")) |>
  select(term,OR,p.value) |>
  filter(term != "(Intercept)") |>
  knitr::kable(format = "html", booktabs = TRUE) |>
  kableExtra::kable_styling(font_size = 11)


## Performance difference from morning to evening =====

#Something the reviewers asked for was to do the change in performance from evening to morning
#I suppose for subjects who did not meet criterion we just take their last test round accuracy

meta |> drop_na(age_eeg,accC_evening,accC_morning) |> group_by(group) |> tally()

m_diff =
  meta %>%
  select(group,family,gender,age_eeg,accC_evening,accC_morning) %>%
  drop_na() %>%
  mutate(hit_diff = round(((accC_morning-accC_evening)*15)/100,digits = 0),
         acc_diff = accC_morning-accC_evening) %>%
  lmer(hit_diff ~ group + age_eeg + gender + (1|family),data = .) 

#Calculate post-hoc tests using estimated marginal means
m_diff_emm <- emmeans(m_diff, pairwise ~ group,infer = T,adjust = "bonferroni")

m1.posthoc <-
  m_diff_emm$contrasts %>% as_tibble() %>%
  mutate(across(where(is.double) , round,digits = 3))



#Tidy model output into a table for presentation
tidy(m_diff,exponentiate = F, conf.int = T,
     ddf = "Kenward-Roger",effect = "fixed") %>%
  mutate(across(where(is.double),round,digits = 3),
         beta = paste(estimate," [",conf.low,", ",conf.high,"]",sep="")) |>
  select(term,beta,p.value) |>
  filter(term != "(Intercept)") |>
  knitr::kable(format = "html", booktabs = TRUE) |>
  kableExtra::kable_styling(font_size = 11)



#And then we assemble these subcomponenets into the final table in excel


#Table 5: Sleep Architecture Adjusted Models =====

#We can investigate whether subjective sleep problems - and other psychiatric characteristics -
#correlate with our PSG hypographic measures

m_hypno_adj_sleep = 
  meta %>%
  drop_na(age_eeg) |>
  select(group, subject, family,gender,age_eeg,sleepprobsall,fsiqall,anyanxsymsall,adhdsymsall,asqtotalsymsall,
         percnightn1_eeg , percnightn2_eeg ,percnightn3_eeg, percnightrem_eeg,
         latencyn1_eeg,latencyrem_eeg,se_eeg,tst_eeg,waso_eeg) |>
  pivot_longer(-c(group:asqtotalsymsall),names_to = "measure") |>
  group_by(measure) |>
  nest() |>
  mutate(models = map(data,~lmer(value ~ group + gender + age_eeg + sleepprobsall + (1|family),
                                 data = .x)),
         tidy_d = map(models,~tidy(.x,effect = "fixed", conf.int = T,ddf = "Kenward-Roger")))

m_hypno_adj_sleep |>
  unnest(tidy_d)|>
  filter(term == "group22q") |>
  select(measure,estimate:conf.high) %>%
  mutate(across(where(is.double),round,digits = 3),
         Difference = paste(estimate," [",conf.low,", ",conf.high,"]",sep="")) |>
  select(measure,Difference,p.value) |>
  knitr::kable(format = "html", booktabs = TRUE) |>
  kableExtra::kable_styling(font_size = 11)

#So adjusting for CAPA sleep problems does not change the genotype effects


#What about psychiatric measures?
m_hypno_adj_22q = 
  meta %>%
  drop_na(age_eeg) |>
  filter(group == "22q") |>
  select(family,gender,age_eeg,sleepprobsall,fsiqall,anyanxsymsall,adhdsymsall,asqtotalsymsall,pesall,
         percnightn1_eeg , percnightn2_eeg ,percnightn3_eeg, percnightrem_eeg,
         latencyn1_eeg,latencyrem_eeg,se_eeg,tst_eeg,waso_eeg) |>
  pivot_longer(-c(family:pesall),names_to = "measure") |>
  group_by(measure) |>
  nest() |>
  mutate(models = map(data,~lm(value ~ gender + age_eeg + sleepprobsall +fsiqall +anyanxsymsall +adhdsymsall +asqtotalsymsall + pesall,
                                 data = .x)),
         tidy_d = map(models,~tidy(.x, conf.int = T)))


#No need for the multilevel model as all 22q are from different families

## Supplementary Table 3 ====
m_hypno_adj_22q |>
  ungroup() |>
  unnest(tidy_d) |>
  filter(term != "(Intercept)") |>
  group_by(term) |>
  nest() |>
  mutate(p_corr = map(data,~.x %>%
                            pull(p.value) |>
                            p.adjust(method = "BH"))) |>
  unnest(c(data,p_corr)) |>
  mutate(across(where(is.double),round,digits = 3),
         Difference = paste(estimate," [",conf.low,", ",conf.high,"]",sep="")) |>
  select(measure,term,Difference,p_corr) |>
  knitr::kable(format = "html", booktabs = TRUE) |>
  kableExtra::kable_styling(font_size = 11)
